{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 10 Analyzing mosaic plots"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%%html\n",
""
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import pandas as pd\n",
"import plotly.graph_objects as go\n",
"import seaborn as sns\n",
"from statsmodels.graphics.mosaicplot import mosaic"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import findspark\n",
"\n",
"findspark.init()\n",
"from pyspark.context import SparkContext\n",
"from pyspark.sql import functions as F\n",
"from pyspark.sql.session import SparkSession\n",
"\n",
"spark = SparkSession.builder.appName(\"statistics\").master(\"local\").getOrCreate()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"[khanacademy](https://www.khanacademy.org/math/ap-statistics/analyzing-categorical-ap/xfb5d8e68:mosaic-plots/v/analyzing-mosaic-plots?modal=1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
""
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"dataset = {\n",
" \"surveyees\": [\"Students\", \"Staff\", \"Parents\"],\n",
" \"No\": [800, 60, 150],\n",
" \"Yes\": [200, 240, 150],\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" No | \n",
" Yes | \n",
"
\n",
" \n",
" surveyees | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" Students | \n",
" 800 | \n",
" 200 | \n",
"
\n",
" \n",
" Staff | \n",
" 60 | \n",
" 240 | \n",
"
\n",
" \n",
" Parents | \n",
" 150 | \n",
" 150 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" No Yes\n",
"surveyees \n",
"Students 800 200\n",
"Staff 60 240\n",
"Parents 150 150"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame(dataset).set_index(\"surveyees\")\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+---------+---+---+\n",
"|surveyees| No|Yes|\n",
"+---------+---+---+\n",
"| Students|800|200|\n",
"| Staff| 60|240|\n",
"| Parents|150|150|\n",
"+---------+---+---+\n",
"\n"
]
}
],
"source": [
"sdf = spark.createDataFrame(zip(*dataset.values()), schema=list(dataset.keys()))\n",
"sdf.show()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(,\n",
" {('Students', 'No'): (0.0, 0.0, 0.6188118811881188, 0.7973421926910299),\n",
" ('Students', 'Yes'): (0.0,\n",
" 0.8006644518272424,\n",
" 0.6188118811881188,\n",
" 0.19933554817275742),\n",
" ('Staff', 'No'): (0.6237623762376238,\n",
" 0.0,\n",
" 0.18564356435643564,\n",
" 0.19933554817275748),\n",
" ('Staff', 'Yes'): (0.6237623762376238,\n",
" 0.2026578073089701,\n",
" 0.18564356435643564,\n",
" 0.7973421926910299),\n",
" ('Parents', 'No'): (0.8143564356435643,\n",
" 0.0,\n",
" 0.18564356435643564,\n",
" 0.4983388704318937),\n",
" ('Parents', 'Yes'): (0.8143564356435643,\n",
" 0.5016611295681063,\n",
" 0.18564356435643564,\n",
" 0.4983388704318937)})"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"mosaic(df.stack())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
},
"toc-autonumbering": false
},
"nbformat": 4,
"nbformat_minor": 4
}